import re
import requests
import os
def get_data(i):
    # 发送请求，获取数据
    url = "https://book.douban.com/latest?subcat=%E5%85%A8%E9%83%A8&p={}"
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'}
    print('正在下载第{}页'.format(i))
    page_url = url.format(i)
    print(page_url)
    # 1.发起请求
    result = requests.get(page_url, headers=headers)
    print(result.status_code)
    html_data = result.text
    return html_data

def parse_data(html_data):
    # 解析数据
    ti = '<li\sclass="media\sclearfix.*?">.*?<img\sclass="subject-cover".*?src="(.*?jpg)"/>.*?' \
         '<a\sclass="fleft"\shref="(.*?)">(.*?)</a>.*?<p\sclass="subject-abstract\scolor-gray">(.*?)</p>'
    result_data = re.findall(ti, html_data, re.S)
    return result_data

def save_data(result_data):
    # 保存数据
    for i in result_data:
        photo = i[0]
        title_link = i[1]
        title = i[2]
        author_details = i[3].strip()

        print('图片链接：', photo)
        print('标题链接：', title_link)
        print('标题：', title)
        print('作者详情：', author_details)
        print("=========")
        # 文字信息
        with open('data.txt', "a",encoding='utf-8') as f:
            f.write(title+'\n')
            f.write(title_link+'\n')
            f.write(author_details+'\n')
            f.write(photo+'\n')
        # 具体的图片
        #需要再次发起请求，获取图片的二进制数据并保存
        img_data = requests.get(photo).content
        # print(img_data)
        if not os.path.exists("Data"):
            os.mkdir("Data")
        with open('Data/{}.jpg'.format(title), 'wb') as f:
            f.write(img_data)


if __name__ == '__main__':
    for i in range(12):
        html_data = get_data(i)
        result_data = parse_data(html_data)
        save_data(result_data)
